DSAN-5200 Lab-5

Author

JaeHo Bahng

Import module / Set options and theme
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import xml.etree.ElementTree as ET
import plotly.express as px
import plotly.graph_objects as go
from scipy.stats import ttest_rel
from statsmodels.stats.weightstats import ttest_ind
import numpy as np
import pingouin as pg
from scipy.stats import zscore
import plotly.graph_objects as go
import pandas as pd
from plotly.subplots import make_subplots
import warnings
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt


warnings.filterwarnings("ignore")

pd.set_option('display.max_columns', None)
pd.set_option('display.precision', 10)
Import cleaned data
df = pd.read_csv('./data/cleaned_df.csv')
df['Location_ID'] = df.groupby(['long', 'lat']).ngroup() + 1

Manipulate Dataset

Average Scenarios

The Average Scenarios dataset averages all the numerical columns of the scenarios into one row, outputing one row for each Location, Year, and RCP. This dataset is used when conducting EDA and visualizing overtrend

Clean data (Average Scenarios)
group_list = ['Park', 'long', 'lat', 'veg', 'year', 'TimePeriod', 'RCP','treecanopy', 'Ann_Herb', 'Bare', 'Herb', 'Litter', 'Shrub', 'El', 'Sa','Cl', 'RF', 'Slope', 'E', 'S']
veg_location = df.drop(labels='scenario',axis=1).groupby(group_list).mean().reset_index()
# veg_location['T_Annual'] = (veg_location['T_Annual'] - veg_location['T_Annual'].min()) / (veg_location['T_Annual'].max() - veg_location['T_Annual'].min())

# Convert to numeric, coercing errors to NaN
numeric_series = pd.to_numeric(veg_location['RCP'], errors='coerce')

numeric_series

# Fill NaNs with original non-numeric values
veg_location['RCP'] = numeric_series.fillna(veg_location['RCP'])

four = veg_location[veg_location['RCP'].isin([4.5])]
eight = veg_location[veg_location['RCP'].isin([8.5])]
four_h = veg_location[veg_location['RCP'].isin(['historical'])]
four_h['RCP'] = 4.5
eight_h = veg_location[veg_location['RCP'].isin(['historical'])]
eight_h['RCP'] = 8.5

df_con = pd.concat([four_h, four, eight_h, eight], ignore_index=True)
df_con['Location_ID'] = df_con.groupby(['long', 'lat']).ngroup() + 1

df_con.head(5)
Park long lat veg year TimePeriod RCP treecanopy Ann_Herb Bare Herb Litter Shrub El Sa Cl RF Slope E S T_P_Corr DrySoilDays_Winter_top50 DrySoilDays_Spring_top50 DrySoilDays_Summer_top50 DrySoilDays_Fall_top50 DrySoilDays_Winter_whole DrySoilDays_Spring_whole DrySoilDays_Summer_whole DrySoilDays_Fall_whole Evap_Winter Evap_Spring Evap_Summer Evap_Fall ExtremeShortTermDryStress_Winter_top50 ExtremeShortTermDryStress_Spring_top50 ExtremeShortTermDryStress_Summer_top50 ExtremeShortTermDryStress_Fall_top50 ExtremeShortTermDryStress_Winter_whole ExtremeShortTermDryStress_Spring_whole ExtremeShortTermDryStress_Summer_whole ExtremeShortTermDryStress_Fall_whole FrostDays_Winter FrostDays_Spring FrostDays_Summer FrostDays_Fall NonDrySWA_Winter_top50 NonDrySWA_Spring_top50 NonDrySWA_Summer_top50 NonDrySWA_Fall_top50 NonDrySWA_Winter_whole NonDrySWA_Spring_whole NonDrySWA_Summer_whole NonDrySWA_Fall_whole PET_Winter PET_Spring PET_Summer PET_Fall PPT_Winter PPT_Spring PPT_Summer PPT_Fall SemiDryDuration_Annual_top50 SemiDryDuration_Annual_whole SWA_Winter_top50 SWA_Spring_top50 SWA_Summer_top50 SWA_Fall_top50 SWA_Winter_whole SWA_Spring_whole SWA_Summer_whole SWA_Fall_whole T_Winter T_Spring T_Summer T_Fall Tmax_Winter Tmax_Spring Tmax_Summer Tmax_Fall Tmin_Winter Tmin_Spring Tmin_Summer Tmin_Fall Transp_Winter Transp_Spring Transp_Summer Transp_Fall VWC_Winter_top50 VWC_Spring_top50 VWC_Summer_top50 VWC_Fall_top50 VWC_Winter_whole VWC_Spring_whole VWC_Summer_whole VWC_Fall_whole WetSoilDays_Winter_top50 WetSoilDays_Spring_top50 WetSoilDays_Summer_top50 WetSoilDays_Fall_top50 WetSoilDays_Winter_whole WetSoilDays_Spring_whole WetSoilDays_Summer_whole WetSoilDays_Fall_whole PPT_Annual T_Annual RL Location_ID
0 NABR -110.0472 37.60413 Shrubland 1980 Hist 4.5 0 0 84 5 11 7 1764.955 77.03307 6.082058 2.285707 1949.283 -8753.784 4834.13 -0.6636760860 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.7140658366 6.3995308949 1.5598074021 3.3632779979 NaN 24.34 36.16 29.52 NaN 24.34 36.16 29.52 75.0 34.0 0.0 26.0 3.4668806371 2.6546632530 0.0321140671 0.4880867481 3.4668806371 2.6546632530 0.0321140671 0.4880867481 7.7811633032 31.1394527955 48.0177480655 21.9156825756 13.79 8.71 2.69 6.37 36.5000000000 36.5000000000 3.4668806371 2.6546632530 0.0321140671 0.4880867481 3.4668806371 2.6546632530 0.0321140671 0.4880867481 0.96483520 8.767935 23.15924 11.962090 14.15 28.75 37.05 31.15 -12.45 -7.35 5.55 -10.25 0.2370806 5.296833 1.067496 1.9667860 0.1134468701 0.0968307001 0.0418759016 0.0522975530 0.1134468701 0.0968307001 0.0418759016 0.0522975530 91.0 77.0 5.0 47.0 91.0 77.0 5.0 47.0 31.56 11.21352505 54.57202074 1
1 NABR -110.0472 37.60413 Shrubland 1981 Hist 4.5 0 0 84 5 11 7 1764.955 77.03307 6.082058 2.285707 1949.283 -8753.784 4834.13 0.3478010620 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.1815202084 5.9723378265 5.0428776741 4.6374034668 13.92 26.53 36.08 NaN 13.92 26.53 36.08 NaN 79.0 26.0 0.0 13.0 0.3461917264 0.8982752558 0.0336629893 2.5013360811 0.3461917264 0.8982752558 0.0336629893 2.5013360811 8.1229049607 32.3882557036 48.1772426406 21.7575735702 2.25 9.81 9.39 11.75 13.2500000000 13.2500000000 0.3461917264 0.8982752558 0.0336629893 2.5013360811 0.3461917264 0.8982752558 0.0336629893 2.5013360811 3.33444400 10.548370 23.27065 11.581320 17.05 28.15 37.55 29.75 -9.35 -5.55 1.25 -7.25 0.2930753 3.506108 3.916328 2.7875470 0.0493818430 0.0607271763 0.0426386771 0.0936706801 0.0493818430 0.0607271763 0.0426386771 0.0936706801 48.0 60.0 13.0 85.0 48.0 60.0 13.0 85.0 33.20 12.18369600 54.57202074 1
2 NABR -110.0472 37.60413 Shrubland 1982 Hist 4.5 0 0 84 5 11 7 1764.955 77.03307 6.082058 2.285707 1949.283 -8753.784 4834.13 0.3260300992 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.2589947135 4.7173273934 4.5276363327 4.2477717540 NaN 26.19 34.99 22.06 NaN 26.19 34.99 22.06 83.0 21.0 0.0 30.0 3.2599844936 1.5994982052 0.1993822366 1.2432253150 3.2599844936 1.5994982052 0.1993822366 1.2432253150 7.3379526955 31.4894498184 47.1800768757 21.0684231651 4.12 5.10 9.50 9.83 17.2857142857 17.2857142857 3.2599844936 1.5994982052 0.1993822366 1.2432253150 3.2599844936 1.5994982052 0.1993822366 1.2432253150 -0.01555556 9.472283 22.05707 9.869231 14.35 28.45 36.65 31.85 -16.55 -7.25 5.65 -6.25 0.2453347 3.105047 3.523923 2.8900990 0.1092341982 0.0748166564 0.0456102615 0.0677891794 0.1092341982 0.0748166564 0.0456102615 0.0677891794 90.0 62.0 19.0 73.0 90.0 62.0 19.0 73.0 28.55 10.34575711 54.57202074 1
3 NABR -110.0472 37.60413 Shrubland 1983 Hist 4.5 0 0 84 5 11 7 1764.955 77.03307 6.082058 2.285707 1949.283 -8753.784 4834.13 0.0388273872 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.7419915365 6.2671578978 5.1695757094 3.7751048188 NaN 28.56 33.69 31.02 NaN 28.56 33.69 31.02 85.0 32.0 0.0 19.0 3.8064480379 2.9456592119 0.0960442305 1.5835966242 3.8064480379 2.9456592119 0.0960442305 1.5835966242 7.4798456947 30.3128312703 46.5762368398 21.8471460016 7.09 10.80 10.22 10.40 16.7142857143 16.7142857143 3.8064480379 2.9456592119 0.0960442305 1.5835966242 3.8064480379 2.9456592119 0.0960442305 1.5835966242 0.40944440 8.020652 21.32826 11.325820 13.35 30.65 34.55 33.15 -15.05 -7.25 3.85 -8.95 0.2252735 4.962824 5.006576 1.1952350 0.1204177901 0.1025422325 0.0441405046 0.0748017843 0.1204177901 0.1025422325 0.0441405046 0.0748017843 90.0 74.0 15.0 69.0 90.0 74.0 15.0 69.0 38.51 10.27104410 54.57202074 1
4 NABR -110.0472 37.60413 Shrubland 1984 Hist 4.5 0 0 84 5 11 7 1764.955 77.03307 6.082058 2.285707 1949.283 -8753.784 4834.13 0.2166602692 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.6272686835 5.0078604793 5.2303324404 4.0803373430 NaN 30.95 34.01 29.52 NaN 30.95 34.01 29.52 91.0 35.0 0.0 30.0 3.7945975224 1.7555326024 0.0452782946 1.3792575946 3.7945975224 1.7555326024 0.0452782946 1.3792575946 7.1730101555 31.9972417196 47.0386757592 21.0183982059 4.77 4.32 9.49 8.17 16.5000000000 16.5000000000 3.7945975224 1.7555326024 0.0452782946 1.3792575946 3.7945975224 1.7555326024 0.0452782946 1.3792575946 -1.04725300 9.853804 21.95978 10.034070 10.25 32.75 35.35 31.35 -18.45 -8.45 2.95 -12.45 0.1226868 3.120243 4.269040 0.9273169 0.1202091711 0.0778415354 0.0431793330 0.0703661709 0.1202091711 0.0778415354 0.0431793330 0.0703661709 91.0 65.0 16.0 62.0 91.0 65.0 16.0 62.0 26.75 10.20010025 54.57202074 1

All Scenarios

Code
# Convert to numeric, coercing errors to NaN
numeric_series = pd.to_numeric(df['RCP'], errors='coerce')

numeric_series

# Fill NaNs with original non-numeric values
df['RCP'] = numeric_series.fillna(df['RCP'])

four = df[df['RCP'].isin([4.5])]
eight = df[df['RCP'].isin([8.5])]
four_h = df[df['RCP'].isin(['historical'])]
four_h['RCP'] = 4.5
eight_h = df[df['RCP'].isin(['historical'])]
eight_h['RCP'] = 8.5

df_orig = pd.concat([four_h, four, eight_h, eight], ignore_index=True)
df_orig['Location_ID'] = df_orig.groupby(['long', 'lat']).ngroup() + 1

df_orig.head(5)
Park long lat veg year TimePeriod RCP scenario treecanopy Ann_Herb Bare Herb Litter Shrub El Sa Cl RF Slope E S T_P_Corr DrySoilDays_Winter_top50 DrySoilDays_Spring_top50 DrySoilDays_Summer_top50 DrySoilDays_Fall_top50 DrySoilDays_Winter_whole DrySoilDays_Spring_whole DrySoilDays_Summer_whole DrySoilDays_Fall_whole Evap_Winter Evap_Spring Evap_Summer Evap_Fall ExtremeShortTermDryStress_Winter_top50 ExtremeShortTermDryStress_Spring_top50 ExtremeShortTermDryStress_Summer_top50 ExtremeShortTermDryStress_Fall_top50 ExtremeShortTermDryStress_Winter_whole ExtremeShortTermDryStress_Spring_whole ExtremeShortTermDryStress_Summer_whole ExtremeShortTermDryStress_Fall_whole FrostDays_Winter FrostDays_Spring FrostDays_Summer FrostDays_Fall NonDrySWA_Winter_top50 NonDrySWA_Spring_top50 NonDrySWA_Summer_top50 NonDrySWA_Fall_top50 NonDrySWA_Winter_whole NonDrySWA_Spring_whole NonDrySWA_Summer_whole NonDrySWA_Fall_whole PET_Winter PET_Spring PET_Summer PET_Fall PPT_Winter PPT_Spring PPT_Summer PPT_Fall SemiDryDuration_Annual_top50 SemiDryDuration_Annual_whole SWA_Winter_top50 SWA_Spring_top50 SWA_Summer_top50 SWA_Fall_top50 SWA_Winter_whole SWA_Spring_whole SWA_Summer_whole SWA_Fall_whole T_Winter T_Spring T_Summer T_Fall Tmax_Winter Tmax_Spring Tmax_Summer Tmax_Fall Tmin_Winter Tmin_Spring Tmin_Summer Tmin_Fall Transp_Winter Transp_Spring Transp_Summer Transp_Fall VWC_Winter_top50 VWC_Spring_top50 VWC_Summer_top50 VWC_Fall_top50 VWC_Winter_whole VWC_Spring_whole VWC_Summer_whole VWC_Fall_whole WetSoilDays_Winter_top50 WetSoilDays_Spring_top50 WetSoilDays_Summer_top50 WetSoilDays_Fall_top50 WetSoilDays_Winter_whole WetSoilDays_Spring_whole WetSoilDays_Summer_whole WetSoilDays_Fall_whole PPT_Annual T_Annual RL Location_ID
0 NABR -110.0472 37.60413 Shrubland 1980 Hist 4.5 sc1 0 0 84 5 11 7 1764.955 77.03307 6.082058 2.285707 1949.283 -8753.784 4834.13 -0.6636760860 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.7140658366 6.3995308949 1.5598074021 3.3632779979 NaN 24.34 36.16 29.52 NaN 24.34 36.16 29.52 75.0 34.0 0.0 26.0 3.4668806371 2.6546632530 0.0321140671 0.4880867481 3.4668806371 2.6546632530 0.0321140671 0.4880867481 7.7811633032 31.1394527955 48.0177480655 21.9156825756 13.79 8.71 2.69 6.37 36.5000000000 36.5000000000 3.4668806371 2.6546632530 0.0321140671 0.4880867481 3.4668806371 2.6546632530 0.0321140671 0.4880867481 0.96483520 8.767935 23.15924 11.962090 14.15 28.75 37.05 31.15 -12.45 -7.35 5.55 -10.25 0.2370806 5.296833 1.067496 1.9667860 0.1134468701 0.0968307001 0.0418759016 0.0522975530 0.1134468701 0.0968307001 0.0418759016 0.0522975530 91.0 77.0 5.0 47.0 91.0 77.0 5.0 47.0 31.56 11.21352505 54.57202074 1
1 NABR -110.0472 37.60413 Shrubland 1981 Hist 4.5 sc1 0 0 84 5 11 7 1764.955 77.03307 6.082058 2.285707 1949.283 -8753.784 4834.13 0.3478010620 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.1815202084 5.9723378265 5.0428776741 4.6374034668 13.92 26.53 36.08 NaN 13.92 26.53 36.08 NaN 79.0 26.0 0.0 13.0 0.3461917264 0.8982752558 0.0336629893 2.5013360811 0.3461917264 0.8982752558 0.0336629893 2.5013360811 8.1229049607 32.3882557036 48.1772426406 21.7575735702 2.25 9.81 9.39 11.75 13.2500000000 13.2500000000 0.3461917264 0.8982752558 0.0336629893 2.5013360811 0.3461917264 0.8982752558 0.0336629893 2.5013360811 3.33444400 10.548370 23.27065 11.581320 17.05 28.15 37.55 29.75 -9.35 -5.55 1.25 -7.25 0.2930753 3.506108 3.916328 2.7875470 0.0493818430 0.0607271763 0.0426386771 0.0936706801 0.0493818430 0.0607271763 0.0426386771 0.0936706801 48.0 60.0 13.0 85.0 48.0 60.0 13.0 85.0 33.20 12.18369600 54.57202074 1
2 NABR -110.0472 37.60413 Shrubland 1982 Hist 4.5 sc1 0 0 84 5 11 7 1764.955 77.03307 6.082058 2.285707 1949.283 -8753.784 4834.13 0.3260300992 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.2589947135 4.7173273934 4.5276363327 4.2477717540 NaN 26.19 34.99 22.06 NaN 26.19 34.99 22.06 83.0 21.0 0.0 30.0 3.2599844936 1.5994982052 0.1993822366 1.2432253150 3.2599844936 1.5994982052 0.1993822366 1.2432253150 7.3379526955 31.4894498184 47.1800768757 21.0684231651 4.12 5.10 9.50 9.83 17.2857142857 17.2857142857 3.2599844936 1.5994982052 0.1993822366 1.2432253150 3.2599844936 1.5994982052 0.1993822366 1.2432253150 -0.01555556 9.472283 22.05707 9.869231 14.35 28.45 36.65 31.85 -16.55 -7.25 5.65 -6.25 0.2453347 3.105047 3.523923 2.8900990 0.1092341982 0.0748166564 0.0456102615 0.0677891794 0.1092341982 0.0748166564 0.0456102615 0.0677891794 90.0 62.0 19.0 73.0 90.0 62.0 19.0 73.0 28.55 10.34575711 54.57202074 1
3 NABR -110.0472 37.60413 Shrubland 1983 Hist 4.5 sc1 0 0 84 5 11 7 1764.955 77.03307 6.082058 2.285707 1949.283 -8753.784 4834.13 0.0388273872 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.7419915365 6.2671578978 5.1695757094 3.7751048188 NaN 28.56 33.69 31.02 NaN 28.56 33.69 31.02 85.0 32.0 0.0 19.0 3.8064480379 2.9456592119 0.0960442305 1.5835966242 3.8064480379 2.9456592119 0.0960442305 1.5835966242 7.4798456947 30.3128312703 46.5762368398 21.8471460016 7.09 10.80 10.22 10.40 16.7142857143 16.7142857143 3.8064480379 2.9456592119 0.0960442305 1.5835966242 3.8064480379 2.9456592119 0.0960442305 1.5835966242 0.40944440 8.020652 21.32826 11.325820 13.35 30.65 34.55 33.15 -15.05 -7.25 3.85 -8.95 0.2252735 4.962824 5.006576 1.1952350 0.1204177901 0.1025422325 0.0441405046 0.0748017843 0.1204177901 0.1025422325 0.0441405046 0.0748017843 90.0 74.0 15.0 69.0 90.0 74.0 15.0 69.0 38.51 10.27104410 54.57202074 1
4 NABR -110.0472 37.60413 Shrubland 1984 Hist 4.5 sc1 0 0 84 5 11 7 1764.955 77.03307 6.082058 2.285707 1949.283 -8753.784 4834.13 0.2166602692 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.6272686835 5.0078604793 5.2303324404 4.0803373430 NaN 30.95 34.01 29.52 NaN 30.95 34.01 29.52 91.0 35.0 0.0 30.0 3.7945975224 1.7555326024 0.0452782946 1.3792575946 3.7945975224 1.7555326024 0.0452782946 1.3792575946 7.1730101555 31.9972417196 47.0386757592 21.0183982059 4.77 4.32 9.49 8.17 16.5000000000 16.5000000000 3.7945975224 1.7555326024 0.0452782946 1.3792575946 3.7945975224 1.7555326024 0.0452782946 1.3792575946 -1.04725300 9.853804 21.95978 10.034070 10.25 32.75 35.35 31.35 -18.45 -8.45 2.95 -12.45 0.1226868 3.120243 4.269040 0.9273169 0.1202091711 0.0778415354 0.0431793330 0.0703661709 0.1202091711 0.0778415354 0.0431793330 0.0703661709 91.0 65.0 16.0 62.0 91.0 65.0 16.0 62.0 26.75 10.20010025 54.57202074 1

Data Exploration

Basic Statistics

Basic Statistics
- 79 years of prediction (2021~2099)
- 40 scenarios (sc22~sc61)
- 2 RCP scenarios(4.5, 8.5)
- 113 locations

Explanation
- The data is collected over 113 locations within the Natural Bridge National Monument. (Number of Unique latitude, longitude combinations)
- This dataset is composed of 41 years of historical data and 79 years worth of predictions. Since there can be only one scenario for past data, all historical data is labeled as ‘sc1’ or scenario one
- From the predicted years (2021 to 2099), There are two RCP scenarios which changes only the RCP variable and 40 scenarios which simulate 86 other variables.

Based on each combination of scenarios, a prediction is made for each location point regarding various columns of the dataset including annual and seasonal percipitation, seasonal dry soil days, seasonal evaporation, seasonal extreme short term dry stress, soil water availability to output a final prediction for Annual and seasonal temperatures.

What is RCP?

Representative Concentration Pathways : A group of scenarios where CO2 emmission is predicted like the image below
- The dataset consists of two RCP scenarios 4.5 and 8.5

source : Representative Concentration Pathway. (2024, May 2). In Wikipedia. https://en.wikipedia.org/wiki/Representative_Concentration_Pathway

Location

Where is this data located and how does it look like?

The data points were sampled at the Natural Bridge National Monument in Utah. And for a better idea of The plots below show two different location aspects of the dataset. The first plot is the average annual temperature for each location point in the year 2099. Since the temperature for predictions increase over time, the last year for the dataset was chosen for a more dramatic comparison

The second plot is a scatter plot of the locations of vegetations. By comparing the two graphs, we can tell that there isn’t much correlation with vegetation and annual temperature but rather a correlation with the location(latitude, longitude) and temperature. We will get to this in the following visualizations.

Map Visualizations
map = df_con[df_con['year']==2099].groupby(['long','lat'])['T_Annual'].mean().reset_index()

filtered_df = map
fig = px.scatter_mapbox(filtered_df, lat="lat", lon="long", color="T_Annual", size="T_Annual",
                  color_continuous_scale=px.colors.cyclical.IceFire, size_max=8, zoom=11,
                  mapbox_style="open-street-map")

fig.update_layout(
    title={
        'text': "<b>Average Temperature (2099) </b>",
        'y': 0.97,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    margin={"r": 0, "t": 40, "l": 0, "b": 0}
    )

fig.show()

map = df_con[df_con['year']==2099].groupby(['long','lat','veg']).size().reset_index()

filtered_df = map

# Create the scatter mapbox
fig = px.scatter_mapbox(map, lat="lat", lon="long", color="veg",
                        color_continuous_scale=px.colors.cyclical.IceFire, size_max=8, zoom=11,
                        mapbox_style="open-street-map")

# Update the layout with the new legend title and position
fig.update_layout(
    title={
        'text': "<b>Vegetation Location</b>",
        'y': 0.97,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    coloraxis_colorbar={
        'title': 'Vegetation Level'  # Change this to your desired legend title
    },
    legend={
        'x': 1,  # Position the legend to the right
        'y': 0.8,  # Center the legend vertically
        'xanchor': 'left',  # Anchor the legend's x position to the left side
        'yanchor': 'middle'  # Anchor the legend's y position to the middle
    },
    margin={"r": 0, "t": 40, "l": 0, "b": 0}
)
fig.update_traces(marker=dict(size=10))  # Set the desired fixed marker size

# Show the figure
fig.show()

Perspectives to Consider

What are some aspects of the datasets we can slice and dice or drill down to compare and retrieve meaningful insights?

Most numerical features are generated based on the scenario of the model that generated future data, and some numerical features such ase S,E,Slope, RF, Cl, Sa, El, treecanopy etc. are features that are fixed according to a unique location. Therefore categorical variables are the aspects of the datasets we can compare to create new insights

Categorical Variables : - RCP - Vegetation - Scenario

The following plots compare the predicted annual temperature for each category for the three categorical variables

Temperature RCP comparison
# Assuming 'veg_location' is your DataFrame
# Filter the DataFrame for 'RCP' values 'historical' and 4.5
filtered_data = df_con.groupby(['year','RCP'])['T_Annual'].mean().reset_index()

# Create the line plot
fig = px.line(
    data_frame=filtered_data,
    x='year',
    y='T_Annual',
    color='RCP',  # This will create lines for each unique value in 'veg' and color them differently
    title='<b>Annual Temperature by RCP Type</b>',
    labels={'T_Annual': 'Annual Temperature'},
    line_shape='spline'
)
fig.update_layout(title={'x':0.5})

# Add a vertical line at year 2021
fig.add_shape(
    dict(
        type='line',
        x0=2021,
        y0=filtered_data['T_Annual'].min()/1.1,
        x1=2021,
        y1=filtered_data['T_Annual'].max()*1.1,
        line=dict(
            color="Red",
            width=2,
            dash="dash",
        ),
    )
)

fig.add_annotation(
    dict(
        x=2021,  # Position the text to the right of the line
        y=filtered_data['T_Annual'].max(),  # Position the text at the middle of the y-axis
        xref="x",
        yref="y",
        text="Prediction",
        showarrow=False,
        font=dict(
            size=12,
            color="Red"
        ),
        align="center",
        xanchor="left"
    )
)

# Show the plot
fig.show()

Since RCP deals with CO2 emission and the 8.5 scenario has a higher emission prediction than the 4.5 scenario, the annual temperature increase of rcp8.5 is more rapid than rcp4.5 although both are increasing.

Temperature comparison (Vegetation)
# Filter the DataFrame for 'RCP' values 'historical' and 4.5
filtered_data = df_con[df_con['RCP'].isin(['historical', 4.5])].groupby(['year','veg'])['T_Annual'].mean().reset_index()

# Create the line plot
fig = px.line(
    data_frame=filtered_data,
    x='year',
    y='T_Annual',
    color='veg',  # This will create lines for each unique value in 'veg' and color them differently
    title='<b>Annual Temperature by Vegetation Type</b>',
    labels={'T_Annual': 'Annual Temperature'}
)
fig.update_layout(title={'x':0.5})


# Add a vertical line at year 2021
fig.add_shape(
    dict(
        type='line',
        x0=2021,
        y0=filtered_data['T_Annual'].min()/1.1,
        x1=2021,
        y1=filtered_data['T_Annual'].max()*1.1,
        line=dict(
            color="Red",
            width=2,
            dash="dash",
        ),
    )
)

fig.add_annotation(
    dict(
        x=2021,  # Position the text to the right of the line
        y=filtered_data['T_Annual'].max(),  # Position the text at the middle of the y-axis
        xref="x",
        yref="y",
        text="Prediction",
        showarrow=False,
        font=dict(
            size=12,
            color="Red"
        ),
        align="center",
        xanchor="left"
    )
)

# Show the plot
fig.show()

The vegetations seem to follow exactly the same pattern of prediciton with a fixed difference between each other. This may mean that when calculating predictions based on scenarios, the algorithm was modeled so that the mean of the vegetations were always a given distance apart from each other. Because of this limitation of the algorithm, it is unncessary to compare vegetations from each other. We will always get the same difference.

Temperature comparison (scenario)
# Assuming df_orig is your DataFrame and it has been filtered to exclude 'Hist' from 'TimePeriod'
df_filtered = df_orig[df_orig['TimePeriod'] != 'Hist']

# Calculate the median of 'T_Annual' for each 'scenario'
medians = df_filtered.groupby('scenario')['T_Annual'].median().reset_index()

# Sort the median values
medians = medians.sort_values('T_Annual')

# Merge sorted median DataFrame back to the filtered DataFrame
df_sorted = pd.merge(medians['scenario'], df_filtered, on='scenario', how='left')

# Creating a boxplot with Plotly Express using the sorted DataFrame
fig = px.box(df_sorted, x='scenario', y='T_Annual', color='RCP')

# Rotating x-axis labels
fig.update_layout(
    xaxis_tickangle=-90,
    title={
        'text': "<b>Annual Temperature by Scenario</b>",
        'x':0.5,
        'xanchor': 'center'
    }
)
# Displaying the plot
fig.show()

Since we already know that RCP plays a big role in how the algorithm predicts the temperature, We will divide the scenarios into 4.5 scenarios and 8.5 scenarios to see if there is a significant difference. By examining the plot we now know that RCP 4.5 correspons to scenario 22~41 and RCP 8.5 correspons to scenario 42~61. There are cases where 4.5 scenarios had higher temperatures than 8.5 scenarios, but since RCP acts as the first drill down layer of the dataset, we can use the scenario as the second drilldown of the dataset.

For our dataset analysis, we will be comparing the maximum and minimum scenario for each RCP group to analyze what features affect temperature the most. That is comparing scenario 37 to scenario 40 for RCP 4.5 scenarios, and comparing scenario 58 to scenario 60 to do the same for RCP 8.5.

Statistical Significance

Is there a significant difference between different scenarios?

Before we start analyzing our dataset, one final step we want to take is proving the statistical significance in the different scenarios we plan on comparing.

The three comparisons we plan on making are as follows: 1. RCP 8.5(High) vs RCP 4.5(Low) 2. RCP 4.5 : Scenario 37(High) vs Scenario 40(Low) 2. RCP 8.5 : Scenario 60(High) vs Scenario 58(Low)

T-test for RCP 4.5 and 8.8
# Creating data groups
data_before = df_orig[(df_orig['RCP'] == 8.5) & (df_orig['TimePeriod'] != 'Hist')]['T_Annual']
data_after = df_orig[(df_orig['RCP'] == 4.5) & (df_orig['TimePeriod'] != 'Hist')]['T_Annual']
 
# Conducting two-sample ttest
result = pg.ttest(data_before,
                  data_after,
                  correction=True)
 

# # Print the result
# print("t value : ",result['T'][0])
# print("95% Confidence Interval : ", result['CI95%'][0])
# print("p-value : ", result['p-val'][0])

T-test for RCP 4.5 and 8.8

Result Value
t-value 232.998
95% Confidence Interval [1.25 1.27]
p-Value 0.000
T-test for Scenario 40 vs 37
# Creating data groups
data_before = df_orig[df_orig['scenario'] == 'sc40']['T_Annual']
data_after = df_orig[df_orig['scenario'] == 'sc37']['T_Annual']
 
# Conducting two-sample ttest
result = pg.ttest(data_before,
                  data_after,
                  correction=True)
 
# # Print the result
# # Print the result
# print("t value : ",result['T'][0])
# print("95% Confidence Interval : ", result['CI95%'][0])
# print("p-value : ", result['p-val'][0])

T-test for Scenario 40 vs 37

Result Value
t-value -157.977
95% Confidence Interval [-2.51 -2.45]
p-Value 0.000
T-test for Scenario 60 vs 58
# Creating data groups
data_before = df_orig[df_orig['scenario'] == 'sc60']['T_Annual']
data_after = df_orig[df_orig['scenario'] == 'sc58']['T_Annual']
 
# Conducting two-sample ttest
result = pg.ttest(data_before,
                  data_after,
                  correction=True)
 
# # Print the result
# print("t value : ",result['T'][0])
# print("95% Confidence Interval : ", result['CI95%'][0])
# print("p-value : ", result['p-val'][0])

T-test for Scenario 60 vs 58

Result Value
t-value -125.742
95% Confidence Interval [-3.61 -3.5 ]
p-Value 0.000

Analysis (RCP = 4.5)

PCA

Code
import plotly.graph_objects as go
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import plotly.express as px

data = df_orig[(df_orig['RCP']==4.5) & (df_orig['year'].isin(range(2095,2100)))].dropna(axis=1, how='any')
X = data.iloc[:,list(range(1, 3))+ [4,6] + list(range(8, len(data.columns)-3))]

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply PCA
pca = PCA(n_components=10)  # Reduce to 2 components for visualization
X_pca = pca.fit_transform(X_scaled)

# Apply k-means clustering
kmeans = KMeans(n_clusters=2, random_state=0)
clusters = kmeans.fit_predict(X_pca)

# Add the PCA and cluster results to the DataFrame
data['PCA1'] = X_pca[:, 0]
data['PCA2'] = X_pca[:, 1]
data['PCA3'] = X_pca[:, 2]

# Get the component loadings
loadings = pca.components_.T
columns = X.columns

# Create a DataFrame for loadings
loadings_df = pd.DataFrame(loadings, columns=['PCA1', 'PCA2', 'PCA3', 'PCA4', 'PCA5', 'PCA6', 'PCA7', 'PCA8', 'PCA9', 'PCA10'], index=columns)

# Get the explained variance ratio
explained_variance_ratio = pca.explained_variance_ratio_

# Cumulative explained variance
cumulative_explained_variance = np.cumsum(explained_variance_ratio)

# Create a list of x-axis labels from PCA1 to PCA10
x_labels = [f'PCA{i+1}' for i in range(len(explained_variance_ratio))]

# Create a line chart for explained variance ratio
fig = go.Figure(data=go.Scatter(
    x=x_labels,
    y=explained_variance_ratio,
    mode='lines+markers',
    text=explained_variance_ratio,
    textposition='top center'
))

# Update layout for better readability
fig.update_layout(
    title='<b>Explained Variance Ratio by Principal Components</b>',
    xaxis_title='Principal Components',
    yaxis_title='Explained Variance Ratio',
    yaxis=dict(tickformat=".2%", range=[0, 1.1 * explained_variance_ratio.max()]),  # Adjust the range as needed
    template='plotly_white',
    margin=dict(l=50, r=50, b=50, t=50)  # Adjust the padding as needed
)

fig.show()
Code
# Visualize the results with Plotly
fig = px.scatter(
    data,
    x='PCA1',
    y='PCA2',
    color='scenario',
    title='<b>PCA and k-means Clustering</b>',
    labels={'PCA1': 'PCA1', 'PCA2': 'PCA2'},
    opacity=0.5
)

fig.update_layout(
    margin=dict(l=10, r=10, b=10, t=40),  # Adjust the values as needed
    title_x=0.5,
    title_y=0.95,
)

fig.show()
Code
# Visualize the results with Plotly in 3D
fig = px.scatter_3d(
    data,
    x='PCA1',
    y='PCA2',
    z='PCA3',
    color='scenario',
    title='<b>3D PCA with Scenario</b>',
    labels={'PCA1': 'PCA1', 'PCA2': 'PCA2', 'PCA3': 'PCA3'},
    opacity=0.5,
    size_max=0.1
)

fig.update_traces(marker=dict(size=3))  # Adjust the size value as needed


# Update layout to adjust padding and margins
fig.update_layout(
    margin=dict(l=5, r=5, b=5, t=20),  # Adjust the values as needed
    title_x=0.5,
    title_y=0.95,
    scene=dict(
        xaxis=dict(title='PCA1'),
        yaxis=dict(title='PCA2'),
        zaxis=dict(title='PCA3')
    )
)
fig.show()
Code
# Sort the features by the absolute value of the loading for PCA1
sorted_loadings = loadings_df['PCA1'].abs().sort_values(ascending=False)

# Get the top 10 most influential features
top_features = sorted_loadings.head(30).index

# Get the actual loadings for these top 10 features
top_loadings = round(loadings_df.loc[top_features, 'PCA1'],4)

# Create a color list based on the sign of the loadings
colors = ['blue' if val > 0 else 'red' for val in top_loadings]

# Create a bar chart
fig = go.Figure(data=[go.Bar(
    x=top_loadings.index,
    y=top_loadings.abs(),
    text=top_loadings.values,  # Show the actual values as text
    textposition='inside',
    marker_color=colors,
    showlegend=False
)])

# Add legend manually
fig.add_trace(go.Bar(
    x=[None], y=[None],
    marker=dict(color='blue'),
    showlegend=True,
    name='Positive'
))
fig.add_trace(go.Bar(
    x=[None], y=[None],
    marker=dict(color='red'),
    showlegend=True,
    name='Negative'
))

# Update layout for better readability
fig.update_layout(
    title='<b>Top 20 Most Influential Features on PCA1</b>',
    xaxis_title='Features',
    yaxis_title='Absolute PCA1 Loadings',
    yaxis=dict(tickformat=".2f"),
    template='plotly_white',
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ),
    margin=dict(l=20, r=20, b=20, t=60)
)

fig.show()
Code
# Sort the features by the absolute value of the loading for PCA1
sorted_loadings = loadings_df['PCA2'].abs().sort_values(ascending=False)

# Get the top 10 most influential features
top_features = sorted_loadings.head(30).index

# Get the actual loadings for these top 10 features
top_loadings = round(loadings_df.loc[top_features, 'PCA2'],4)

# Create a color list based on the sign of the loadings
colors = ['blue' if val > 0 else 'red' for val in top_loadings]

# Create a bar chart
fig = go.Figure(data=[go.Bar(
    x=top_loadings.index,
    y=top_loadings.abs(),
    text=top_loadings.values,  # Show the actual values as text
    textposition='inside',
    marker_color=colors,
    showlegend=False
)])

# Add legend manually
fig.add_trace(go.Bar(
    x=[None], y=[None],
    marker=dict(color='blue'),
    showlegend=True,
    name='Positive'
))
fig.add_trace(go.Bar(
    x=[None], y=[None],
    marker=dict(color='red'),
    showlegend=True,
    name='Negative'
))

# Update layout for better readability
fig.update_layout(
    title='<b>Top 20 Most Influential Features on PCA2</b>',
    xaxis_title='Features',
    yaxis_title='Absolute PCA1 Loadings',
    yaxis=dict(tickformat=".2f"),
    template='plotly_white',
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ),
    margin=dict(l=20, r=20, b=20, t=60)
)

fig.show()
Code
# Sort the features by the absolute value of the loading for PCA1
sorted_loadings = loadings_df['PCA3'].abs().sort_values(ascending=False)

# Get the top 10 most influential features
top_features = sorted_loadings.head(30).index

# Get the actual loadings for these top 10 features
top_loadings = round(loadings_df.loc[top_features, 'PCA3'],4)

# Create a color list based on the sign of the loadings
colors = ['blue' if val > 0 else 'red' for val in top_loadings]

# Create a bar chart
fig = go.Figure(data=[go.Bar(
    x=top_loadings.index,
    y=top_loadings.abs(),
    text=top_loadings.values,  # Show the actual values as text
    textposition='inside',
    marker_color=colors,
    showlegend=False
)])

# Add legend manually
fig.add_trace(go.Bar(
    x=[None], y=[None],
    marker=dict(color='blue'),
    showlegend=True,
    name='Positive'
))
fig.add_trace(go.Bar(
    x=[None], y=[None],
    marker=dict(color='red'),
    showlegend=True,
    name='Negative'
))

# Update layout for better readability
fig.update_layout(
    title='<b>Top 20 Most Influential Features on PCA3</b>',
    xaxis_title='Features',
    yaxis_title='Absolute PCA1 Loadings',
    yaxis=dict(tickformat=".2f"),
    template='plotly_white',
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ),
    margin=dict(l=20, r=20, b=20, t=60)
)

fig.show()

Write about how the scenarios differ based on PCA

Code
# Calculate the correlation matrix
corr_matrix = df_orig[(df_orig['TimePeriod']!='Hist') & (df_orig['RCP']==4.5)].iloc[:,8:].corr()


# Create an interactive heatmap of the correlation matrix
fig = px.imshow(corr_matrix,
                # text_auto=True,  # Automatically add text in each cell
                labels=dict(color="Correlation"),
                x=corr_matrix.columns,
                y=corr_matrix.columns,
                color_continuous_scale='RdBu_r'
  )  # Red-Blue color map, reversed

fig.update_layout(
    width=900,  # Width of the figure in pixels
    height=900,  # Height of the figure in pixels
    margin=dict(l=10, r=1, t=50, b=10)  # Reducing margins around the plot
)

# Adjusting color bar position
# fig.update_layout(coloraxis_colorbar=dict(
#     x=0.8  # Adjusts the horizontal position of the color bar
# ))
fig.update_layout(title_text='<b>Future Correlation Heatmap : RCP 4.5</b>',  # Bold text using HTML
                  title_x=0.5)  # Centers the title by setting the x position to 0.5

fig.update_xaxes(tickfont=dict(size=10))  # Sets the font size of x-axis labels
fig.update_yaxes(tickfont=dict(size=10))  # Sets the font size of y-axis labels
# fig.update_xaxes(side="bottom")
fig.show()

RMSE

Another method I chose to see which values were most different between two scenarios was using the RMSE to find the columns with the greatest difference since Each scenario has the same number of data points. The data points were standardized and used for RMSE calculation.

Code
sc60 = df_orig[df_orig['scenario'] == 'sc60']
sc58 = df_orig[df_orig['scenario'] == 'sc58']

df1 = sc60.iloc[:,8:-3]
df2 = sc58.iloc[:,8:-3]

# Function to calculate z-scores
def standardize(df):
    return df.apply(zscore)

# Standardize both dataframes
z_df1 = standardize(df1)
z_df2 = standardize(df2)

# Calculate Absolute Difference of Z-Scores
abs_diff_z_scores = np.abs(z_df1 - z_df2)

# Mean Absolute Difference
mean_abs_diff = abs_diff_z_scores.mean()

# RMSE
rmse = np.sqrt(np.mean((z_df1.reset_index(drop=True) - z_df2.reset_index(drop=True))**2, axis=0))

rmse_sort = rmse.sort_values(ascending=False).head(30)

# Create a bar chart
fig = go.Figure(data=[go.Bar(
    x=rmse_sort.keys(),
    y=rmse_sort.values,
    text=[round(i,4) for i in rmse_sort.values],  # Show the actual values as text
    textposition='inside',
    # marker_color=colors,
    showlegend=False
)])

# Update layout for better readability
fig.update_layout(
    title='<b>Scenario 58 vs 60 RMSE of components</b>',
    xaxis_title='Features',
    yaxis_title='Absolute PCA1 Loadings',
    yaxis=dict(tickformat=".2f"),
    template='plotly_white',
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ),
    margin=dict(l=20, r=20, b=20, t=60)
)

fig.show()

RCP = 8.5 (Analysis)

Code
import plotly.graph_objects as go
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import plotly.express as px

data = df_orig[(df_orig['RCP']==8.5) & (df_orig['year'].isin(range(2095,2100)))].dropna(axis=1, how='any')
X = data.iloc[:,list(range(1, 3))+ [4,6] + list(range(8, len(data.columns)-3))]

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply PCA
pca = PCA(n_components=10)  # Reduce to 2 components for visualization
X_pca = pca.fit_transform(X_scaled)

# Apply k-means clustering
kmeans = KMeans(n_clusters=2, random_state=0)
clusters = kmeans.fit_predict(X_pca)

# Add the PCA and cluster results to the DataFrame
data['PCA1'] = X_pca[:, 0]
data['PCA2'] = X_pca[:, 1]
data['PCA3'] = X_pca[:, 2]

# Get the component loadings
loadings = pca.components_.T
columns = X.columns

# Create a DataFrame for loadings
loadings_df = pd.DataFrame(loadings, columns=['PCA1', 'PCA2', 'PCA3', 'PCA4', 'PCA5', 'PCA6', 'PCA7', 'PCA8', 'PCA9', 'PCA10'], index=columns)

# Get the explained variance ratio
explained_variance_ratio = pca.explained_variance_ratio_

# Cumulative explained variance
cumulative_explained_variance = np.cumsum(explained_variance_ratio)

# Create a list of x-axis labels from PCA1 to PCA10
x_labels = [f'PCA{i+1}' for i in range(len(explained_variance_ratio))]

# Create a line chart for explained variance ratio
fig = go.Figure(data=go.Scatter(
    x=x_labels,
    y=explained_variance_ratio,
    mode='lines+markers',
    text=explained_variance_ratio,
    textposition='top center'
))

# Update layout for better readability
fig.update_layout(
    title='<b>Explained Variance Ratio by Principal Components</b>',
    xaxis_title='Principal Components',
    yaxis_title='Explained Variance Ratio',
    yaxis=dict(tickformat=".2%", range=[0, 1.1 * explained_variance_ratio.max()]),  # Adjust the range as needed
    template='plotly_white',
    margin=dict(l=50, r=50, b=50, t=50)  # Adjust the padding as needed
)

fig.show()
Code
# Visualize the results with Plotly
fig = px.scatter(
    data,
    x='PCA1',
    y='PCA2',
    color='scenario',
    title='<b>PCA and k-means Clustering</b>',
    labels={'PCA1': 'PCA1', 'PCA2': 'PCA2'},
    opacity=0.5
)

fig.update_layout(
    margin=dict(l=10, r=10, b=10, t=40),  # Adjust the values as needed
    title_x=0.5,
    title_y=0.95,
)

fig.show()
Code
# Visualize the results with Plotly in 3D
fig = px.scatter_3d(
    data,
    x='PCA1',
    y='PCA2',
    z='PCA3',
    color='scenario',
    title='<b>3D PCA with Scenario</b>',
    labels={'PCA1': 'PCA1', 'PCA2': 'PCA2', 'PCA3': 'PCA3'},
    opacity=0.5,
    size_max=0.1
)

fig.update_traces(marker=dict(size=3))  # Adjust the size value as needed


# Update layout to adjust padding and margins
fig.update_layout(
    margin=dict(l=5, r=5, b=5, t=20),  # Adjust the values as needed
    title_x=0.5,
    title_y=0.95,
    scene=dict(
        xaxis=dict(title='PCA1'),
        yaxis=dict(title='PCA2'),
        zaxis=dict(title='PCA3')
    )
)
fig.show()
Code
# Sort the features by the absolute value of the loading for PCA1
sorted_loadings = loadings_df['PCA1'].abs().sort_values(ascending=False)

# Get the top 10 most influential features
top_features = sorted_loadings.head(30).index

# Get the actual loadings for these top 10 features
top_loadings = round(loadings_df.loc[top_features, 'PCA1'],4)

# Create a color list based on the sign of the loadings
colors = ['blue' if val > 0 else 'red' for val in top_loadings]

# Create a bar chart
fig = go.Figure(data=[go.Bar(
    x=top_loadings.index,
    y=top_loadings.abs(),
    text=top_loadings.values,  # Show the actual values as text
    textposition='inside',
    marker_color=colors,
    showlegend=False
)])

# Add legend manually
fig.add_trace(go.Bar(
    x=[None], y=[None],
    marker=dict(color='blue'),
    showlegend=True,
    name='Positive'
))
fig.add_trace(go.Bar(
    x=[None], y=[None],
    marker=dict(color='red'),
    showlegend=True,
    name='Negative'
))

# Update layout for better readability
fig.update_layout(
    title='<b>Top 20 Most Influential Features on PCA1</b>',
    xaxis_title='Features',
    yaxis_title='Absolute PCA1 Loadings',
    yaxis=dict(tickformat=".2f"),
    template='plotly_white',
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ),
    margin=dict(l=20, r=20, b=20, t=60)
)

fig.show()
Code
# Sort the features by the absolute value of the loading for PCA1
sorted_loadings = loadings_df['PCA2'].abs().sort_values(ascending=False)

# Get the top 10 most influential features
top_features = sorted_loadings.head(30).index

# Get the actual loadings for these top 10 features
top_loadings = round(loadings_df.loc[top_features, 'PCA2'],4)

# Create a color list based on the sign of the loadings
colors = ['blue' if val > 0 else 'red' for val in top_loadings]

# Create a bar chart
fig = go.Figure(data=[go.Bar(
    x=top_loadings.index,
    y=top_loadings.abs(),
    text=top_loadings.values,  # Show the actual values as text
    textposition='inside',
    marker_color=colors,
    showlegend=False
)])

# Add legend manually
fig.add_trace(go.Bar(
    x=[None], y=[None],
    marker=dict(color='blue'),
    showlegend=True,
    name='Positive'
))
fig.add_trace(go.Bar(
    x=[None], y=[None],
    marker=dict(color='red'),
    showlegend=True,
    name='Negative'
))

# Update layout for better readability
fig.update_layout(
    title='<b>Top 20 Most Influential Features on PCA2</b>',
    xaxis_title='Features',
    yaxis_title='Absolute PCA1 Loadings',
    yaxis=dict(tickformat=".2f"),
    template='plotly_white',
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ),
    margin=dict(l=20, r=20, b=20, t=60)
)

fig.show()
Code
# Sort the features by the absolute value of the loading for PCA1
sorted_loadings = loadings_df['PCA3'].abs().sort_values(ascending=False)

# Get the top 10 most influential features
top_features = sorted_loadings.head(30).index

# Get the actual loadings for these top 10 features
top_loadings = round(loadings_df.loc[top_features, 'PCA3'],4)

# Create a color list based on the sign of the loadings
colors = ['blue' if val > 0 else 'red' for val in top_loadings]

# Create a bar chart
fig = go.Figure(data=[go.Bar(
    x=top_loadings.index,
    y=top_loadings.abs(),
    text=top_loadings.values,  # Show the actual values as text
    textposition='inside',
    marker_color=colors,
    showlegend=False
)])

# Add legend manually
fig.add_trace(go.Bar(
    x=[None], y=[None],
    marker=dict(color='blue'),
    showlegend=True,
    name='Positive'
))
fig.add_trace(go.Bar(
    x=[None], y=[None],
    marker=dict(color='red'),
    showlegend=True,
    name='Negative'
))

# Update layout for better readability
fig.update_layout(
    title='<b>Top 20 Most Influential Features on PCA3</b>',
    xaxis_title='Features',
    yaxis_title='Absolute PCA1 Loadings',
    yaxis=dict(tickformat=".2f"),
    template='plotly_white',
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ),
    margin=dict(l=20, r=20, b=20, t=60)
)

fig.show()

Write about how the scenarios differ based on PCA

Correlation Heatmap

Code
# Calculate the correlation matrix
corr_matrix = df_orig[(df_orig['TimePeriod']!='Hist') & (df_orig['RCP']==8.5)].iloc[:,8:].corr()

# Create an interactive heatmap of the correlation matrix
fig = px.imshow(corr_matrix,
                # text_auto=True,  # Automatically add text in each cell
                labels=dict(color="Correlation"),
                x=corr_matrix.columns,
                y=corr_matrix.columns,
                color_continuous_scale='RdBu_r'
  )  # Red-Blue color map, reversed

fig.update_layout(
    width=900,  # Width of the figure in pixels
    height=900,  # Height of the figure in pixels
    margin=dict(l=10, r=1, t=50, b=10)  # Reducing margins around the plot
)

# Adjusting color bar position
# fig.update_layout(coloraxis_colorbar=dict(
#     x=0.8  # Adjusts the horizontal position of the color bar
# ))
fig.update_layout(title_text='<b>Future Correlation Heatmap : RCP 8.5</b>',  # Bold text using HTML
                  title_x=0.5)  # Centers the title by setting the x position to 0.5

fig.update_xaxes(tickfont=dict(size=10))  # Sets the font size of x-axis labels
fig.update_yaxes(tickfont=dict(size=10))  # Sets the font size of y-axis labels
# fig.update_xaxes(side="bottom")
fig.show()

RMSE

Code
sc40 = df_orig[df_orig['scenario'] == 'sc40']
sc37 = df_orig[df_orig['scenario'] == 'sc37']

df1 = sc40.iloc[:,8:-3]
df2 = sc37.iloc[:,8:-3]

# Function to calculate z-scores
def standardize(df):
    return df.apply(zscore)

# Standardize both dataframes
z_df1 = standardize(df1)
z_df2 = standardize(df2)

# Calculate Absolute Difference of Z-Scores
abs_diff_z_scores = np.abs(z_df1 - z_df2)

# Mean Absolute Difference
mean_abs_diff = abs_diff_z_scores.mean()

# RMSE
rmse = np.sqrt(np.mean((z_df1.reset_index(drop=True) - z_df2.reset_index(drop=True))**2, axis=0))

rmse_sort = rmse.sort_values(ascending=False).head(30)

# Create a bar chart
fig = go.Figure(data=[go.Bar(
    x=rmse_sort.keys(),
    y=rmse_sort.values,
    text=[round(i,4) for i in rmse_sort.values],  # Show the actual values as text
    textposition='inside',
    # marker_color=colors,
    showlegend=False
)])

# Update layout for better readability
fig.update_layout(
    title='<b>Scenario 40 vs 37 RMSE of components</b>',
    xaxis_title='Features',
    yaxis_title='Absolute PCA1 Loadings',
    yaxis=dict(tickformat=".2f"),
    template='plotly_white',
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ),
    margin=dict(l=20, r=20, b=20, t=60)
)

fig.show()

RL, wetsoildaysspringwhole, WetSoilDays_Sprint_Top50, Transp_Winter, Transp_Spring, SWA_Spring_Top50, SWA_Winter_Top50, PET_Spring, PET_Summer, PET_Fall, PET,Winter, NonDry_SWA_Winter, NonDry_SWA_Spring, ExtremeShortTermDryStress_Summer, Evap_Winter, TreeCanopy, Bare, Herb, Litter, Shrub, Sa, Cl, S,E

Code
# import plotly.express as px
# import plotly.graph_objects as go
# from plotly.subplots import make_subplots

# # List of columns to use for coloring
# color_columns = list(df_con.columns)

# # Creating the initial scatter plot with the first color column
# initial_color = color_columns[0]
# fig = px.scatter(
#     df_con[df_con['year'].isin(range(2060, 2099))],
#     x="PPT_Annual",
#     y="T_Annual",
#     # color=initial_color,
#     facet_col="RCP"
# )

# # Updating the layout to add the title
# fig.update_layout(
#     title={
#         'text': '<b>Annual Precipitation vs Temperature by RCP Scenarios</b>',
#         'x': 0.5,
#         'xanchor': 'center'
#     },
#     title_font=dict(size=20)
# )

# # Adding dropdown filter to change color based on selected column
# dropdown_buttons = [
#     {
#         'label': column,
#         'method': 'restyle',
#         'args': [
#             {'marker': {'color': df_con[column]}},  # Update the color
#             {},  # Leave empty to avoid breaking restyle
#             # [0]  # Apply to the first trace
#         ],
#         'args2': [
#             {'marker': {'color': df_con[initial_color]}},  # Revert to initial color if needed
#             {},
#             # [1]
#         ]
#     }
#     for column in color_columns
# ]

# fig.update_layout(
#     updatemenus=[
#         {
#             'buttons': dropdown_buttons,
#             'direction': 'down',
#             'showactive': True,
#             'x': 1,
#             'xanchor': 'center',
#             'y': 1.15,
#             'yanchor': 'top'
#         }
#     ]
# )

# # Show the figure
# fig.show()
Code
# Assuming df_con is your DataFrame and is already loaded
# List of columns to use for coloring
test = df_con.iloc[:,list(range(1, 3))+ [4,6] + list(range(8, len(df_orig.columns)-1))]
color_columns = list(test.columns)
rcp_values = test['RCP'].unique()

subplot_titles = [f'RCP {rcp}' for rcp in rcp_values]

# Create figure with subplots for each RCP value
fig = make_subplots(rows=1, cols=len(rcp_values), shared_yaxes=True, subplot_titles=subplot_titles, horizontal_spacing=0.15)

# Add a scatter trace for each color column and each RCP value
for i, col in enumerate(color_columns):
    for j, rcp in enumerate(rcp_values):
        fig.add_trace(
            go.Scatter(
                x=test[(test['year'].isin(range(2060, 2100))) & (test['RCP'] == rcp)]['PPT_Annual'],
                y=test[(test['year'].isin(range(2060, 2100))) & (test['RCP'] == rcp)]['T_Annual'],
                mode='markers',
                marker=dict(
                    color=test[(test['year'].isin(range(2060, 2100))) & (test['RCP'] == rcp)][col],
                    colorbar=dict(
                        # title='Scale',
                                  tickmode='array',
                                  tickvals=[round(i,2) for i in np.linspace(start=round(min(test[(test['year'].isin(range(2060, 2100)) & (test['RCP'] == rcp))][col]),2),stop=round(max(test[(test['year'].isin(range(2060, 2100)) & (test['RCP'] == rcp))][col]),2),num=5)],
                                  ticktext=[round(i,2) for i in np.linspace(start=round(min(test[(test['year'].isin(range(2060, 2100)) & (test['RCP'] == rcp))][col]),2),stop=round(max(test[(test['year'].isin(range(2060, 2100)) & (test['RCP'] == rcp))][col]),2),num=5)],
                                  y=0.5,
                                  x= 0.43 + (j*0.58)
                                  ),
                                  colorscale='rdpu'
                ),
                name=col,
                visible=True if i == 0 else False,
                hovertemplate=(
                    f"<b>{col}</b><br>"
                    "Precipitation: %{x}<br>"
                    "Temperature: %{y}<br>"
                    "RCP: " + str(rcp) + "<br>"
                    "Value: %{marker.color}<br>"
                    "<extra></extra>"
                  )  # This hides the secondary box with trace info  # Only the first trace is visible initially
            ),
            row=1, col=j+1
        )

# Updating the layout to add the title
fig.update_layout(
    title={
        'text': '<b>Annual Precipitation vs Temperature by RCP Scenarios</b>',
        'x': 0.5,
        'y': 0.97,
        'xanchor': 'center'
    },
    # title_font=dict(size=20),
    showlegend=False  # Hide legend since we are using colorbars
)

# Adding dropdown filter to change visible trace
dropdown_buttons = [
    {
        'label': col,
        'method': 'update',
        'args': [
            {
                'visible': [col == color_column for color_column in color_columns for _ in rcp_values]
            },
            {
                'title': {'text': f'<b>Annual Precipitation vs Temperature by {col}</b>', 'x':0.5},
                'marker': {'colorbar': {'title': 'Scale'}}
            }
        ]
    }
    for col in color_columns
]

fig.update_layout(
    updatemenus=[
        {
            'buttons': dropdown_buttons,
            'direction': 'down',
            'showactive': True,
            'x': 0.5,
            'xanchor': 'center',
            'y': 1.16,
            'yanchor': 'top'
        }
    ]
)

fig.update_xaxes(title_text="Annual Precipitation", row=1, col=1)
fig.update_yaxes(title_text="Annual Temperature", row=1, col=1)
fig.update_xaxes(title_text="Annual Precipitation", row=1, col=2)

for annotation in fig['layout']['annotations']:
    annotation['font'] = {'size': 12, 'color': 'black'}

# Show the figure
fig.show()

For Useful Variables

Code
fig = px.scatter(df_con[df_con['year'].isin(range(2060,2099))], x="PPT_Annual", y="T_Annual", color="Bare", facet_col="RCP")
fig.update_layout(
    title={
        'text': '<b>Annual Precipitation vs Temperature by RCP Scenarios</b>',
        # 'y':0.95,  # This adjusts the position of the title (vertically)
        'x':0.5,   # Centers the title horizontally
        'xanchor': 'center',  # Ensures the title is centered at the specified x position
        # 'yanchor': 'top'  # Ensures the title is positioned based on the top of the text
    },
    title_font=dict(size=20)  # Custom font settings
)
fig.show()
Code
import plotly.express as px
# df = px.data.gapminder()
fig = px.scatter(df_con, x="PPT_Annual", y="T_Annual", animation_frame="year", animation_group="Location_ID",
           size="Bare", color="RL", hover_name="long", facet_col="RCP",
           range_x=[0,100], range_y=[0,20]
        )
fig.show()
Code
# Calculate the correlation matrix
corr_matrix = df_orig[df_orig['TimePeriod']=='Hist'].iloc[:,8:].corr()

# Create an interactive heatmap of the correlation matrix
fig = px.imshow(corr_matrix,
                # text_auto=True,  # Automatically add text in each cell
                labels=dict(color="Correlation"),
                x=corr_matrix.columns,
                y=corr_matrix.columns,
                color_continuous_scale='RdBu_r'
  )  # Red-Blue color map, reversed

fig.update_layout(
    width=900,  # Width of the figure in pixels
    height=900,  # Height of the figure in pixels
    margin=dict(l=10, r=1, t=50, b=10)  # Reducing margins around the plot
)

# Adjusting color bar position
# fig.update_layout(coloraxis_colorbar=dict(
#     x=0.8  # Adjusts the horizontal position of the color bar
# ))
fig.update_layout(title_text='<b>History Correlation Heatmap</b>',  # Bold text using HTML
                  title_x=0.5)  # Centers the title by setting the x position to 0.5

fig.update_xaxes(tickfont=dict(size=10))  # Sets the font size of x-axis labels
fig.update_yaxes(tickfont=dict(size=10))  # Sets the font size of y-axis labels
# fig.update_xaxes(side="bottom")
fig.show()
Code
# import plotly.graph_objects as go
# import pandas as pd

# # Example data setup (define your 'veg_location' DataFrame here)

# # Define color map
# color_map = {
#     'Forest': 'red',
#     'Shrubland': 'green',
#     'Grassland': 'blue',
#     'Woodland': 'pink'
# }

# # Apply the color map to the DataFrame
# veg_location['color'] = veg_location['veg'].map(color_map)

# # Create the figure
# fig = go.Figure()

# # Add traces for each year, one trace per year
# for year in sorted(veg_location['year'].unique()):
#     df_year = veg_location[veg_location['year'] == year]
#     fig.add_trace(
#         go.Scattermapbox(
#             lat=df_year['lat'],
#             lon=df_year['long'],
#             mode='markers',
#             marker=go.scattermapbox.Marker(
#                 size=df_year['T_Annual'],  # Size based on 'T_Annual'
#                 # size = 20,
#                 color=df_year['color'],  # Use predefined colors from the map
#             ),
#             name=str(year),
#             text=df_year['veg'],  # Vegetation type as hover text
#             # visible=False if year != veg_location['year'].min() else True
#         )
#     )

# # Set up the map layout
# fig.update_layout(
#     mapbox_style="open-street-map",
#     mapbox=dict(
#         center=go.layout.mapbox.Center(lat=37.6014, lon=-110.0138),
#         zoom=11
#     ),
#     title='Location of Each Vegetation',
#     margin=dict(l=10, r=10, t=50, b=10)
# )

# # Create slider
# steps = []
# years = sorted(veg_location['year'].unique())
# for i, year in enumerate(years):
#     step = dict(
#         method="update",
#         args=[{"visible": [False] * len(years)},
#               {"title": f"Location of Each Vegetation: {year}"}],
#         label=str(year)
#     )
#     step["args"][0]["visible"][i] = True  # Toggle i-th trace to "visible"
#     steps.append(step)

# sliders = [dict(
#     active=0,
#     currentvalue={"prefix": "Year: "},
#     pad={"t": 50},
#     steps=steps
# )]

# fig.update_layout(sliders=sliders)

# # Show the figure
# fig.show()
Code
# Load the XML file
tree = ET.parse('Climate_Dought_Metrics_Metadata.xml')
root = tree.getroot()

# Define a list to hold your extracted data
attributes = []

# Find all 'attr' elements under the 'detailed' child of 'eainfo'
for attr in root.findall('.//eainfo/detailed/attr'):
    label = attr.find('attrlabl').text if attr.find('attrlabl') is not None else 'No Label'
    definition = attr.find('attrdef').text if attr.find('attrdef') is not None else 'No Definition'
    
    # Append a tuple or dictionary to the attributes list
    attributes.append(label + ":" + definition)

attributes
['Park:This attribute (column name) in the data table represents the National Park unit name.',
 'long:This attribute (column name) in the data table represents the longitude coordinate for the point, Datum in WGS84.',
 'lat:This attribute (column name) in the data table represents the latitude coordinate for the point, Datum in WGS84.',
 'veg:This attribute (column name) in the data table represents the vegetation type for the sampled location.',
 'year:This attribute (column name) in the data table represents the year for the datapoint.',
 'TimePeriod:This attribute (column name) in the data table represents the time period for each data record.',
 'RCP:This attribute (column name) in the data table represents the representative concentration pathway.',
 'scenario:This attribute (column name) in the data table represents the general circulation model (GCM).',
 'treecanopy:This attribute (column name) in the data table represents the percentage of tree canopy coverage.',
 'Ann_Herb:This attribute (column name) in the data table represents the percentage of annual herbaceous coverage.',
 'Bare:This attribute (column name) in the data table represents the percentage of bare ground.',
 'Herb:This attribute (column name) in the data table represents the percent coverage of herbaceous plants.',
 'Litter:This attribute (column name) in the data table represents the percent coverage of plant litter.',
 'Shrub:This attribute (column name) in the data table represents the percent coverage of shrubs.',
 'El:This attribute (column name) in the data table represents elevation in meters.',
 'Sa:This attribute (column name) in the data table represents the percent sand in the soil.',
 'Cl:This attribute (column name) in the data table represents the percent clay in the soil.',
 'RF:This attribute (column name) in the data table represents the percent rock fragment in the soil.',
 'RL:This attribute (column name) in the data table represents the depth to restriction layer.',
 'Slope:This attribute (column name) in the data table represents the percent of slope. Slope gradient in degrees.',
 'E:This attribute (column name) in the data table represents the eastness aspect. Index of how east (10000) or west (-10000) a site faces',
 'S:This attribute (column name) in the data table represents the southness aspect. Index of how south (10000) or north (-10000) a site faces',
 'T_P_Corr:This attribute (column name) in the data table represents the monthly temperature and precipitation correlation.',
 'DrySoilDays_Winter_top50:This attribute (column name) in the data table represents the dry soil days in the top 50 cm of soil during winter',
 'DrySoilDays_Spring_top50:This attribute (column name) in the data table represents the dry soil days in the top 50 cm of soil during spring',
 'DrySoilDays_Summer_top50:This attribute (column name) in the data table represents the dry soil days in the top 50 cm of soil during summer',
 'DrySoilDays_Fall_top50:This attribute (column name) in the data table represents the dry soil days in the top 50 cm of soil during fall',
 'DrySoilDays_Winter_whole:This attribute (column name) in the data table represents the dry soil days in the entire soil profile during winter',
 'DrySoilDays_Spring_whole:This attribute (column name) in the data table represents the dry soil days in the entire soil profile during spring',
 'DrySoilDays_Summer_whole:This attribute (column name) in the data table represents the dry soil days in the entire soil profile during summer',
 'DrySoilDays_Fall_whole:This attribute (column name) in the data table represents the dry soil days in the entire soil profile during fall',
 'Evap_Winter:This attribute (column name) in the data table represents the evaporation during winter.',
 'Evap_Spring:This attribute (column name) in the data table represents the evaporation during spring.',
 'Evap_Summer:This attribute (column name) in the data table represents the evaporation during summer.',
 'Evap_Fall:This attribute (column name) in the data table represents the evaporation during fall.',
 'ExtremeShortTermDryStress_Winter_top50:This attribute (column name) in the data table represents the extreme short term dry stress for the top 50 cm of soil in the winter',
 'ExtremeShortTermDryStress_Spring_top50:This attribute (column name) in the data table represents the extreme short term dry stress for the top 50 cm of soil in the spring',
 'ExtremeShortTermDryStress_Summer_top50:This attribute (column name) in the data table represents the extreme short term dry stress for the top 50 cm of soil in the summer',
 'ExtremeShortTermDryStress_Fall_top50:This attribute (column name) in the data table represents the extreme short term dry stress for the top 50 cm of soil in the fall',
 'ExtremeShortTermDryStress_Winter_whole:This attribute (column name) in the data table represents the extreme short term dry stress for the entire soil profile in the winter',
 'ExtremeShortTermDryStress_Spring_whole:This attribute (column name) in the data table represents the extreme short term dry stress for the entire soil profile in the spring',
 'ExtremeShortTermDryStress_Summer_whole:This attribute (column name) in the data table represents the extreme short term dry stress for the entire soil profile in the summer',
 'ExtremeShortTermDryStress_Fall_whole:This attribute (column name) in the data table represents the extreme short term dry stress for the entire soil profile in the fall',
 'FrostDays_Winter:This attribute (column name) in the data table represents the count of frost days in the winter',
 'FrostDays_Spring:This attribute (column name) in the data table represents the count of frost days in the spring',
 'FrostDays_Summer:This attribute (column name) in the data table represents the count of frost days in the summer',
 'FrostDays_Fall:This attribute (column name) in the data table represents the count of frost days in the fall',
 'NonDrySWA_Winter_top50:This attribute (column name) in the data table represents the non-dry soil water availibility in the top 50cm of soil during winter',
 'NonDrySWA_Spring_top50:This attribute (column name) in the data table represents the non-dry soil water availibility in the top 50cm of soil during spring',
 'NonDrySWA_Summer_top50:This attribute (column name) in the data table represents the non-dry soil water availibility in the top 50cm of soil during summer',
 'NonDrySWA_Fall_top50:This attribute (column name) in the data table represents the non-dry soil water availibility in the top 50cm of soil during fall',
 'NonDrySWA_Winter_whole:This attribute (column name) in the data table represents the non-dry soil water availibility during winter',
 'NonDrySWA_Spring_whole:This attribute (column name) in the data table represents the non-dry soil water availibility during spring',
 'NonDrySWA_Summer_whole:This attribute (column name) in the data table represents the non-dry soil water availibility during summer',
 'NonDrySWA_Fall_whole:This attribute (column name) in the data table represents the non-dry soil water availibility during fall',
 'PET_Winter:This attribute (column name) in the data table represents the potential evapotranspiration in winter',
 'PET_Spring:This attribute (column name) in the data table represents the potential evapotranspiration in spring',
 'PET_Summer:This attribute (column name) in the data table represents the potential evapotranspiration in summer',
 'PET_Fall:This attribute (column name) in the data table represents the potential evapotranspiration in fall',
 'PPT_Winter:This attribute (column name) in the data table represents the precipitation in winter',
 'PPT_Spring:This attribute (column name) in the data table represents the precipitation in spring',
 'PPT_Summer:This attribute (column name) in the data table represents the precipitation in summer',
 'PPT_Fall:This attribute (column name) in the data table represents the precipitation in fall',
 'PPT_Annual:This attribute (column name) in the data table represents the precipitation annually',
 'SemiDryDuration_Annual_top50:This attribute (column name) in the data table represents the duration of the soil being semi-dry during the entire year in the top 50cm of soil',
 'SemiDryDuration_Annual_whole:This attribute (column name) in the data table represents the duration the whole soil profile is dry across an entire year',
 'SWA_Winter_top50:This attribute (column name) in the data table represents the soil water availibility in the top 50 cm of soil during winter',
 'SWA_Spring_top50:This attribute (column name) in the data table represents the soil water availibility in the top 50 cm of soil during spring',
 'SWA_Summer_top50:This attribute (column name) in the data table represents the soil water availibility in the top 50 cm of soil during summer',
 'SWA_Fall_top50:This attribute (column name) in the data table represents the soil water availibility in the top 50 cm of soil during fall',
 'SWA_Winter_whole:This attribute (column name) in the data table represents the soil water availibility in the whole soil profile during winter',
 'SWA_Spring_whole:This attribute (column name) in the data table represents the soil water availibility in the whole soil profile during spring',
 'SWA_Summer_whole:This attribute (column name) in the data table represents the soil water availibility in the whole soil profile during summer',
 'SWA_Fall_whole:This attribute (column name) in the data table represents the soil water availibility in the whole soil profile during fall',
 'T_Winter:This attribute (column name) in the data table represents the temperature in winter',
 'T_Spring:This attribute (column name) in the data table represents the temperature in spring',
 'T_Summer:This attribute (column name) in the data table represents the temperature in summer',
 'T_Fall:This attribute (column name) in the data table represents the temperature in fall',
 'T_Annual:This attribute (column name) in the data table represents the temperature annually',
 'Tmax_Winter:This attribute (column name) in the data table represents the maximum temperature in winter',
 'Tmax_Spring:This attribute (column name) in the data table represents the maximum temperature in spring',
 'Tmax_Summer:This attribute (column name) in the data table represents the maximum temperature in summer',
 'Tmax_Fall:This attribute (column name) in the data table represents the maximum temperature in fall',
 'Tmin_Winter:This attribute (column name) in the data table represents the minimum temperature in winter',
 'Tmin_Spring:This attribute (column name) in the data table represents the minimum temperature in spring',
 'Tmin_Summer:This attribute (column name) in the data table represents the minimum temperature in summer',
 'Tmin_Fall:This attribute (column name) in the data table represents the minimum temperature in fall',
 'Transp_Winter:This attribute (column name) in the data table represents the transpiration in winter',
 'Transp_Spring:This attribute (column name) in the data table represents the transpiration in spring',
 'Transp_Summer:This attribute (column name) in the data table represents the transpiration in summer',
 'Transp_Fall:This attribute (column name) in the data table represents the transpiration in fall',
 'VWC_Winter_top50:This attribute (column name) in the data table represents the volumetric water content in the top 50cm of soil during winter',
 'VWC_Spring_top50:This attribute (column name) in the data table represents the volumetric water content in the top 50cm of soil during spring',
 'VWC_Summer_top50:This attribute (column name) in the data table represents the volumetric water content in the top 50cm of soil during summer',
 'VWC_Fall_top50:This attribute (column name) in the data table represents the volumetric water content in the top 50cm of soil during fall',
 'VWC_Winter_whole:This attribute (column name) in the data table represents the volumetric water content in the whole soil profile during winter',
 'VWC_Spring_whole:This attribute (column name) in the data table represents the volumetric water content in the whole soil profile during spring',
 'VWC_Summer_whole:This attribute (column name) in the data table represents the volumetric water content in the whole soil profile during summer',
 'VWC_Fall_whole:This attribute (column name) in the data table represents the volumetric water content in the whole soil profile during fall',
 'WetSoilDays_Winter_top50:This attribute (column name) in the data table represents the count of days with "wet" soil, defined as any layer in the top 50cm of soil being "wet" (>-1.5MPa) during winter',
 'WetSoilDays_Spring_top50:This attribute (column name) in the data table represents the count of days with "wet" soil, defined as any layer in the top 50cm of soil being "wet" (>-1.5MPa) during spring',
 'WetSoilDays_Summer_top50:This attribute (column name) in the data table represents the count of days with "wet" soil, defined as any layer in the top 50cm of soil being "wet" (>-1.5MPa) during summer',
 'WetSoilDays_Fall_top50:This attribute (column name) in the data table represents the count of days with "wet" soil, defined as any layer in the top 50cm of soil being "wet" (>-1.5MPa) during fall',
 'WetSoilDays_Winter_whole:This attribute (column name) in the data table represents the number of wet soil days during winter',
 'WetSoilDays_Spring_whole:This attribute (column name) in the data table represents the number of wet soil days during spring',
 'WetSoilDays_Summer_whole:This attribute (column name) in the data table represents the number of wet soil days during summer',
 'WetSoilDays_Fall_whole:This attribute (column name) in the data table represents the number of wet soil days during fall']
Exam acronym Exam name Lab acronym Lab name Course designation
OSCP Offensive Security Certified Professional PWK Penetration Testing with Kali Linux PEN-200
OSWP Offensive Security Wireless Professional WA Wireless Attacks PEN-210
OSEP Offensive Security Experienced Penetration Tester ETBD Evasion Techniques and Breaching Defenses PEN-300
OSWA Offensive Security We
b Assessor WAKL Web Attacks with Kali Linux WEB-200
OSWE Offensive Security Web Expert AWAE Advanced Web Attacks and Exploitation WEB-300
OSED Offensive Security Exploit Developer WUMED Windows User Mode Exploit Development EXP-301
OSMR Offensive Security macOS Researcher MCB macOS Control Bypasses EXP-312
OSEE Offensive Security Exploitation Expert AWE Advanced Windows Exploitation EXP-401
OSCE Offensive Security Certified Expert CTP Cracking the Perimeter N/A
OSDA Offensive Security Defense Analyst SODA Security Operations and Defensive Analysis SOC-200